{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Decision Trees: Regression"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Creating Height Dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "\n",
    "n = 200\n",
    "\n",
    "height_pop1_f = np.random.normal(loc=155, scale=4, size=n)\n",
    "height_pop1_m = np.random.normal(loc=175, scale=5, size=n)\n",
    "height_pop2_f = np.random.normal(loc=165, scale=15, size=n)\n",
    "height_pop2_m = np.random.normal(loc=185, scale=12, size=n)\n",
    "\n",
    "height_f = np.concatenate([height_pop1_f, height_pop2_f])\n",
    "height_m = np.concatenate([height_pop1_m, height_pop2_m])\n",
    "\n",
    "df_height = pd.DataFrame(\n",
    "    {\n",
    "        'Gender': [1 for i in range(height_f.size)] + [2 for i in range(height_m.size)],\n",
    "        'Height': np.concatenate((height_f, height_m))\n",
    "    }\n",
    ")\n",
    "\n",
    "# df_height['Gender (text)'] = df_height['Gender'].apply(lambda g: {1: 'F', 2: 'M'}.get(g, 'N/A'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Gender</th>\n",
       "      <th>Height</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>252</th>\n",
       "      <td>1</td>\n",
       "      <td>195.379347</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>716</th>\n",
       "      <td>2</td>\n",
       "      <td>181.509895</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>134</th>\n",
       "      <td>1</td>\n",
       "      <td>163.911061</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     Gender      Height\n",
       "252       1  195.379347\n",
       "716       2  181.509895\n",
       "134       1  163.911061"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_height.sample(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/tarek/anaconda3/envs/scikitbook/lib/python3.6/site-packages/ipykernel_launcher.py:17: UserWarning: Matplotlib is currently using module://ipykernel.pylab.backend_inline, which is a non-GUI backend, so cannot show the figure.\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAqgAAAFNCAYAAADM2Z3wAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8li6FKAAAgAElEQVR4nO3deZxdZZng8d+ThCRYgQQQMKwBp6QHGEVQDAKyRNlapbUdwI+y6agoOsIwKtBK09gs0gqN2rIJsnREHHBBhWlZmzAtoMgiAeyKgUhYAgSskCILCc/8cU+Fm0ptN6l776l7f9/Ppz51znvec85z7pubPHnfc94TmYkkSZJUFmOaHYAkSZJUzQRVkiRJpWKCKkmSpFIxQZUkSVKpmKBKkiSpVExQJUmSVComqJJaWkRkRHy8xn2eiIiv1iumQc57TESsqNOxpxWfxV79rdfhfPsWx9+qHseX1NpMUCWVSkRcERG3DLCt5mQTmApct+6RrRHLLRFxxTDqHVPEnRHxWkR0R8QDEXFeRGzbp/q1wJYjHUPhSSqfxT3DPX4NcayIiGP6FP9Hcb6nR/p8klqfCaqklpaZz2bm0iaHsZJKsrYF8A7gTGA68HB1D2ZmLsnMBSN98ogYn5kri8/i1ZE+fn8yc3lxvtcacT5JrcUEVdKoFRGTIuKCiHgqIl6JiPsj4sN96qzW6xoR20XEryNiaUQ8GRHHR8QdEfH9PocfXxz7xYhYEBHnR8S44hhXADOAo6t6R/cdLNYiWXs2M7sy8/8A7wEeAq6IiLHFcVcb4o+IDSPiBxHxbEQsK+I9b7AYqobuPxYRN0ZED/D1QYb0p0XErRGxJCLmRsQRVefvd5+ImBMRpxfLTwBjgR/0xlGUrzHEHxHTI+LO4lwvRcQPI2Kzqu2nF8c+NCIei4ieom06B/tsJbUeE1RJo1JEBPAL4G3A4cDOwIXAjyJixiD7/BSYTCVB/ADw18Db+6n+BeAZ4F3F8ueBo4ttXwRmAT+m0jM6lcqQ9rBl5grgPODNA5wf4B+BXYFDgU4q1/noMGP4BjCTyudy0SChnAtcDuwC/BCYGREDxdOfd1LpIT6hKo41RMSbgF8D84HdqXz2O7Pm7RdTgc8CHwPeDWxQxCepjYxrdgCS1I99I2LxEHX2AfYANs/M7qLskoiYTiWhvLWffd5LJaHtzMw5AEXv6vx+6s7KzHOK5a6IOLbY/7LM7I6I5cCSzHy2pitb3ezi9/bA7/rZvi1wf2b23jf6Z4okdKAYKjk4ABdn5syq8mkDxHBZVb2vRsT+wP8CjhzOBWTm88U5u4f4LI4HFgHHZObyIqYjgQci4j2ZeWdRbwJwZGY+X9Q5F7gmIiaW4FYNSQ1igiqpjO7h9d7Kal1Vy+8ExgNPVSVlFGXV9artCLzQm5wCZOaLEfHHfuo+0Gf9aWC7IeKuVW/gOcD27wHXR8Q7qCTc/xf4t2He13nvMGP4TZ/1/0fl1oGRthNwd29yCpCZD0ZEd7GtN0F9ujc57V2n8jltRiVBl9QGTFAlldGS6iSyV59EdAzQTSVR7Wt5P2W9BkoGhzpGMvK3Re1U/J7b38bM/LeI2AY4ENgX+FfgDxExIzNXDnHsnhGIrzcRjj7l643AsQfS3+cO3pImtRW/8JJGq98BU4CJmTmnz89APW2PAJtGxJt7CyJiI+Ata3H+5VQeDlorxQNX/wuYA9w/UL3MfDEzr8nMz1C5X3YfKj3B6xxDYXqf9XdT+ZwAensyt6iKezPWnAprOHHMBqZHxPiqY72Nyv3AD9cYs6QWZw+qpNHqNuAW4CcR8WUqT8RvRCXBWpqZl/azzy3Ag8DVEfFFKonVmcAKht+z2utxYL8i2e2mcg/mgFM4FQ8JQeWhn12AE4H/Bhw80JB9RJwJ3EcluXuNyoNDi3l9qHuNGGq8BoBPRsRjVBL+j1O5r/cLUJn2KiL+H/Dlos44Kp/Xsj7H6I3jJmB5Zr7Qz3m+S+XBrisi4iwq/7n4HpV7fWetRdySWpg9qJJGpcxM4IPAT4DzgceAX1HpZfzTIPt8iMrw9yzgl8BNwB+BWh/A+RbwApWE93lgz0HqjqUyI8DTVBLOrwJ3Aztn5l2D7LcUOKPY53fAW6kktL2JaC0xDORk4NNUEvwjgY9n5u+rtn+CSlL8H8CPgEuKa6l2ErAb8ASv97quppjf9QBgK+C3VD77h4GPrEXMklpcVP6+lqT2FBEbUHmK/6uZ+Z1mxyNJcohfUpuJiA9SGdJ/lMqT4X9PZXj/x82MS5L0OhNUSe3mDcBpwDQqQ/33AXvV4xWjkqS14xC/JEmSSsWHpCRJklQqo3aIv7u7265fSZKkUW7y5Ml9XwZiD6okSZLKxQRVkiRJpdKWCWpXV1ezQ1Cd2Laty7ZtbbZv67JtW1u92rchCWpEbB0Rt0fEIxExu3jFIBGxcUTcHBFdxe+NivKIiG9HxJyIeCgidm1EnJIkSWq+RvWgrgBOyswdgenA8RGxI5VX7N2amZ3ArcU6wMFAZ/HzaeDCBsUpSZKkJmvIU/yZ+QzFu5sz8+WIeBTYEjgU2LeodiVwB/CVovyq4r3Zd0fElIiYWhxHkiSpVDKTxYsX89prrzU7lIaaOHEi3d3dg9YZM2YMkyZNImKNh/UH1PCJ+iNiGnAnsDPw58ycUpQH8FJmTomIXwLnZOZdxbZbga9k5u96j1M9zZT3t0iSpGYaN24cm2yyCePHj292KKWzfPlyFi5cyIoVK1aVdXZ2rlrub5qphs6DGhGTgOuBEzJzUXUmnZkZEWuVLVdf5HB0dXXVvI9GB9u2ddm2rc32bV3t0rbd3d1suOGGzQ6j4ZYuXcrEiRMHrTNx4kQyk8mTJw/7uA17ij8i1qOSnM7MzJ8UxQsiYmqxfSrwXFH+FLB11e5bFWWSJElqcY16ij+Ay4BHM/O8qk03AEcXy0cDP68qP6p4mn860O39p5IkSQPbeOON2WuvvVb9zJs3r27nmjlzJl/60pfqdvxGDfHvCRwJ/CEiHijKTgXOAX4cEZ8E5gGHFdtuBA4B5gCvAMc2KE5JkqR1dvgtC0f0eNe+d5Mh66y//vrcddddI3reZmnUU/x3AQM9ujWjn/oJHF/XoCRJklrcypUrOf3007nrrrtYtmwZn/rUpzj22GOZNWsWZ599NpMnT+aRRx7hQx/6EDvuuCMXXXQRS5cuZebMmWy33XbcdNNNfPOb32T58uVsvPHGXHrppWy22WarneOFF17gxBNPZP78+QCcffbZTJ8+fZ3ibuhDUpLW3Uj/r7wshtM7IEka2JIlS9hrr70A2HbbbZk5cyZXX301G264IbfffjvLli3jwAMPZL/99gPg4Ycf5t5772WjjTZil1124cgjj+S2227jwgsv5OKLL+acc85hjz324JZbbiEiuOqqq7jgggs488wzVzvvySefzOc+9zn22GMPnnzySf72b/+We++9d52uxQRVkiSpBfQ3xH/bbbcxe/Zsfv7zymM+ixYtYu7cuay33nrsuuuuvOlNbwJg2rRp7L///gDsuOOOzJo1C4CnnnqKY489lgULFrB8+XK23XbbNc57xx138Nhjj61af/nll1m8eDGTJk1a62sxQZUkSWpRmcm5557LjBmr31E5a9YsJkyYsGp9zJgxq9bHjBnDypUrAfjyl7/M8ccfzyGHHMKsWbM455xz1jjHa6+9xi233DLkdFO1aNg0U5IkSWqsGTNmcNlll/Hqq68CMGfOHHp6eoa9/6JFi9hiiy0AuOaaa/qts//++3PJJZesWn/ooYfWIeIKE1RJkqQWddRRR/FXf/VX7LPPPuyxxx6ccMIJq73RaSgnn3wyRx99NPvssw+bbNL/swLf+MY3uP/++3n3u9/Nu971Ln7wgx+sc9wNf9XpSKl+1Wmt2uWtFu2oHdq2XR+Saoe2bWe2b+tql7bt7u6u6U1JrWI4b5KCwT+f/l51ag+qJEmSSsUEVZIkSaVigipJkqRSMUGVJElSqZigSpIkqVRMUCVJklQqJqiSJEktYMqUKXz6059etb5ixQre/OY3c/jhhw+636xZs4as02i+6lSSJGmETTz/lBE93tITzx6yTkdHB4888ghLlixh/fXX5/bbb2fq1KkjGkej2IMqSZLUIg444AB+/etfA3DdddfxkY98ZNW2++67j/e9733svffeHHDAAXR1da2xf09PD8cffzz7778/e++9N7/61a8aFns1E1RJkqQW8eEPf5jrr7+epUuXMnv2bHbbbbdV2zo7O7npppuYNWsWp556KmecccYa+3/rW9/iPe95D7fddhu/+MUvOO200+jp6WnkJQAO8UuSJLWMnXfemT//+c9cd911HHDAAattW7RoEZ/97GeZO3cuEcGrr766xv633XYbN910E9/5zncAWLZsGfPnz2eHHXZoSPy9TFAlSZJayMEHH8zXvvY1fvnLX/Liiy+uKj/zzDPZe++9mTlzJvPmzeP973//GvtmJldddRWdnZ2NDHkNDvFLkiS1kI9//ON85StfYaeddlqtfNGiRasemvrhD3/Y774zZszgkksuITMBePDBB+sb7ABMUCVJklrIlltuyXHHHbdG+Re/+EXOOOMM9t57b1auXNnvvl/60pd49dVX2XPPPZk+fTpnnXVWvcPtV/RmyKNNd3f3Wgfe1dXV9K5r1Uc7tO3htyxsdgh1ce17Nxl0ezu0bTuzfVtXu7Rtd3c3kydPbnYYDbd06VImTpw4ZL3BPp/JkydH3zJ7UCVJklQqJqiSJEkqFRNUSZIklUpDEtSIuDwinouIh6vKro2IB4qfJyLigaJ8WkQsqdp2USNilCRJUjk0ah7UK4DvAlf1FmTm4b3LEfEtoLuq/p8yc5cGxSZJkrROxowZw/Llyxk/fnyzQymd5cuXM2ZMbX2iDUlQM/POiJjW37aICOAwYP9GxCJJkjTSJk2axOLFi1myZEmzQ2moRYsWseGGGw5aZ8yYMUyaNKmm45bhTVJ7Awsys6uqbLuIuB9YBHw1M2cNdoCurq7BNo/YPhodWr1texZPaHYIddHV9eIw6rR227Y727d12batbenSpTXvM9TUY2VIUD8KXFO1/gywTWYujIjdgJ9FxE6ZuWigA9Q6v1q7zMnWjtqhbTvmteY8qJ2d2wy6vR3atp3Zvq3Ltm1t9Wrfpj7FHxHjgA8D1/aWZeayzFxYLN8H/Al4S3MilCRJUqM1e5qp9wKPZeb83oKI2DQixhbL2wOdwNwmxSdJkqQGa9Q0U9cAvwF2iIj5EfHJYtMRrD68D/Ae4KFi2qnrgOMyc+ib0yRJktQSGvUU/0cHKD+mn7LrgevrHZMkSZLKqdlD/JIkSdJqTFAlSZJUKiaokiRJKhUTVEmSJJWKCaokSZJKxQRVkiRJpWKCKkmSpFIxQZUkSVKpmKBKkiSpVExQJUmSVComqJIkSSoVE1RJkiSVigmqJEmSSsUEVZIkSaVigipJkqRSMUGVJElSqZigSpIkqVRMUCVJklQqJqiSJEkqFRNUSZIklYoJqiRJkkrFBFWSJEmlYoIqSZKkUjFBlSRJUqk0JEGNiMsj4rmIeLiq7PSIeCoiHih+DqnadkpEzImIP0bEgY2IUZIkSeXQqB7UK4CD+ik/PzN3KX5uBIiIHYEjgJ2Kfb4XEWMbFKckSZKarCEJambeCbw4zOqHAj/KzGWZ+TgwB9i9bsFJkiSpVMY1+fyfj4ijgN8BJ2XmS8CWwN1VdeYXZQPq6uqq+cRrs49Gh1Zv257FE5odQl10dQ39f9hWb9t2Z/u2Ltu2ta1N+3Z2dg66vZkJ6oXA14Esfn8L+MTaHGioi+yrq6ur5n00OrRD23bMW9jsEOqis3ObQbe3Q9u2M9u3ddm2ra1e7du0p/gzc0FmrszM14BLeX0Y/ylg66qqWxVlkiRJagNNS1AjYmrV6oeA3if8bwCOiIgJEbEd0Anc2+j4JEmS1BwNGeKPiGuAfYE3RsR84O+BfSNiFypD/E8AnwHIzNkR8WPgEWAFcHxmrmxEnJIkSWq+hiSomfnRfoovG6T+mcCZ9YtIkiRJZeWbpCRJklQqJqiSJEkqFRNUSZIklYoJqiRJkkrFBFWSJEmlYoIqSZKkUjFBlSRJUqmYoEqSJKlUTFAlSZJUKiaokiRJKhUTVEmSJJWKCaokSZJKxQRVkiRJpWKCKkmSpFIZ1+wAJEkDm3j+Kc0OYUhLTzy72SFIajH2oEqSJKlUTFAlSZJUKiaokiRJKhUTVEmSJJWKCaokSZJKxQRVkiRJpWKCKkmSpFIxQZUkSVKpmKBKkiSpVBqSoEbE5RHxXEQ8XFX2TxHxWEQ8FBE/jYgpRfm0iFgSEQ8UPxc1IkZJkiSVQ6N6UK8ADupTdjOwc2a+FfhPoPp9fn/KzF2Kn+MaFKMkSZJKoCEJambeCbzYp+zXmbmiWL0b2KoRsUiSJKncxjU7gMIngGur1reLiPuBRcBXM3PWYDt3dXXVfMK12UejQ6u3bc/iCc0OoS66ul4cRp3Wbtv+bN/T0+wQhjR3hNqlHdu3Xdi2rW1t2rezs3PQ7U1PUCPi74AVwMyi6Blgm8xcGBG7AT+LiJ0yc9FAxxjqIvvq6uqqeR+NDu3Qth3zFjY7hLro7Nxm0O3t0Lb9mdjR0ewQhjQS7dKu7dsObNvWVq/2bepT/BFxDPB+4GOZmQCZuSwzFxbL9wF/At7StCAlSZLUUMNOUCPiixHxxpE6cUQcBHwZ+GBmvlJVvmlEjC2Wtwc6gbkjdV5JkiSVWy09qPsDT0TELyPi8IgY9o1wEXEN8Btgh4iYHxGfBL4LbADc3Gc6qfcAD0XEA8B1wHGZOfTNaZIkSWoJw74HNTMPjYhNgCOAE4CLIuJ64KriKf3B9v1oP8WXDVD3euD64cYlSZKk1lLTPaiZuTAz/yUz9wD2Ad4J3B4RT0TE30XEpLpEKUmSpLZR80NSETEjIn4A3AEsAI4CjgTeDtw0otFJkiSp7Qx7iD8ivklleL8buIrK/KRPVW2/G3hpxCOUJElSW6llHtSJwIcy87f9bczMVyPiHSMTliRJktpVLQnq2cAr1QURsRGwfmY+DZCZj41gbJIkSWpDtdyD+jNgqz5lWwE/HblwJEmS1O5qSVB3yMw/VBcU6381siFJkiSpndWSoD4XEf+luqBYb80Xg0uSJKkpaklQLweuj4j3R8SOEfEBKm96+n59QpMkSVI7quUhqXOAV4FvAlsDT1JJTs+rQ1ySJElqU7W86vQ14J+KH0mSJKkuaulBJSJ2AN4GrPZK08y8fCSDkiRJUvuq5U1SpwKnAQ+y+nyoSeX+VEmSJGmd1dKDegKwe2Y+VK9gJEmSpFqe4l8C+KYoSZIk1VUtCerXgO9ExNSIGFP9U6/gJEmS1H5qGeK/ovj9P6rKgso9qGNHKiBJkiS1t1oS1O3qFoUkSZJUqGUe1HkAxZD+5pn5TN2ikiRJUtsa9v2jETElIn4ILAXmFGUfjIh/rFdwkiRJaj+1POB0EdANbAssL8p+Axw+0kFJkiSpfdVyD+oMYIvMfDUiEiAzn4+IzeoTmiRJktpRLT2o3cAbqwsiYhvAe1ElSZI0YmpJUL8PXB8R+wFjImIP4EoqQ/+SJEnSiKglQf0GcC3wL8B6wOXAz4ELhrNzRFweEc9FxMNVZRtHxM0R0VX83qgoj4j4dkTMiYiHImLXGuKUJEnSKDbsBDUrLsjMHTOzIzP/a2b+c2bmMA9xBXBQn7KTgVszsxO4tVgHOBjoLH4+DVw43DglSZI0ug37IamI2H+gbZl521D7Z+adETGtT/GhwL7F8pXAHcBXivKriuT37mKKq6nOvSpJktT6anmK/7I+65sC44H5wPZref7qCf+fBTYvlrcEnqyqN78oM0GVJElqcbW8SWq1V51GxFjgq8DLIxFIZmbv9FW16urqasg+Gh1avW17Fk9odgh10dX14jDqtHbb9mf7np5mhzCkuSPULu3Yvu3Ctm1ta9O+nZ2dg26vpQd1NZm5MiLOpNK7ed5aHmZB79B9REwFnivKnwK2rqq3VVHWr6Eusq+urq6a99Ho0A5t2zFvYbNDqIvOzm0G3d4ObdufiR0dzQ5hSCPRLu3avu3Atm1t9WrfWp7i78/7gNfWYf8bgKOL5aOpzArQW35U8TT/dKDb+08lSZLaQy0PST0JVA/BvwGYCHxumPtfQ+WBqDdGxHzg74FzgB9HxCeBecBhRfUbgUOAOcArwLHDjVOSJEmjWy1D/B/vs94D/GdmLhrOzpn50QE2zeinbgLH1xCbJEmSWkQtD0n9ez0DkSRJkqC2If6rWX2Iv1+ZedQ6RSRJkqS2VstDUn8B/gYYS+XJ/TFUJtT/C/Cnqh9JkiRprdVyD+pbgL/OzFm9BRGxF/C1zDxwxCOTJElSW6qlB3U6cHefsnuAPUYuHEmSJLW7WhLU+4GzImJ9gOL3mcAD9QhMkiRJ7amWBPUYYE+gOyIWAN3AXrw+0b4kSZK0zmqZZuoJ4N0RsTWwBfBMZv65XoFJkiSpPdXykBQRsQmVt0FNzcxzI2ILYExmzq9HcJKk8pt4/inrfIzte3qY2NExAtGsaemJZ9fluJLqZ9hD/BGxD/BH4GPA14riTuDCOsQlSZKkNlXLPaj/DByemQcBK4qye4DdRzwqSZIkta1aEtRpmXlrsdz7Rqnl1HibgCRJkjSYWhLURyKi74T87wX+MILxSJIkqc3V0vt5EvDLiPgVsH5EXAx8gMrrTiVJkqQRMewe1My8G3grMBu4HHgc2D0zf1un2CRJktSGhtWDGhFjgVuBAzPz3PqGJEmSpHY2rB7UzFwJbDfc+pIkSdLaqiXh/AfgwojYNiLGRsSY3p96BSdJkqT2U8tDUt8vfh/F69NMRbE8diSDkiRJUvsaMkGNiDdl5rNUhvglSZKkuhpOD+p/Ahtm5jyAiPhJZn64vmFJkiSpXQ3n/tHos75vHeKQJEmSgOElqDl0FUmSJGlkDGeIf1xE7MfrPal918nM2+oRnCRJktrPcBLU56i8OarXwj7rCWy/NiePiB2Aa6uKtgdOA6YAnwKeL8pPzcwb1+YckiRJGl2GTFAzc1q9Tp6ZfwR2gVVvq3oK+ClwLHB+Zn6zXueWJElSOZVpkv0ZwJ96ZwuQJElSeypTgnoEcE3V+ucj4qGIuDwiNmpWUJIkSWqsyGz+Q/oRMR54GtgpMxdExObAC1Tub/06MDUzP1G9T3d396rAu7q6Ghmu1FQnzp7Q7BDq4vydljU7hFLa/trvNDuEUW/u4V9odgiS+ujs7Fy1PHny5L5Tmtb0qtN6Ohj4fWYuAOj9DRARlwK/HGzn6oscjq6urpr30ejQDm3bMW9hs0Ooi87ObQbd3g5t25+JHR3NDqEhenp66KjTtbbjn5syadfvbruoV/uWZYj/o1QN70fE1KptHwIebnhEkiRJaoqm96BGRAfwPuAzVcXnRsQuVIb4n+izTZIkSS2s6QlqZvYAm/QpO7JJ4UiSJKnJyjLEL0mSJAEmqJIkSSoZE1RJkiSVigmqJEmSSsUEVZIkSaVigipJkqRSMUGVJElSqZigSpIkqVRMUCVJklQqJqiSJEkqFRNUSZIklYoJqiRJkkplXLMDkCSAw29ZOOj2nsUT6Jg3eJ0yuva9mzQ7BEkadexBlSRJUqmYoEqSJKlUTFAlSZJUKiaokiRJKhUTVEmSJJWKCaokSZJKxQRVkiRJpWKCKkmSpFIxQZUkSVKpmKBKkiSpVExQJUmSVCrjmh2AJEn1NPH8U5odwpCWnnh2s0OQSqUUCWpEPAG8DKwEVmTmOyJiY+BaYBrwBHBYZr7UrBglSZLUGGUa4t8vM3fJzHcU6ycDt2ZmJ3BrsS5JkqQWV4oe1AEcCuxbLF8J3AF8pVnBSGpNp912Vl2PP/EP4+t6fElqRZGZzY6BiHgceAlI4OLMvCQi/pKZU4rtAbzUuw7Q3d29KvCurq5Ghyw1zYmzJzQ7hJZy1m/Oa3YIo9JOG7zW7BBaytzDv9DsEKSG6uzsXLU8efLk6Lu9LD2oe2XmUxGxGXBzRDxWvTEzMyIGzKSrL3I4urq6at5Ho0M7tG3HvIXNDqEpehb30DGpY8SPO3bs2BE/Zjvo6Fh/RI/X09NDR8fIt+9o0cp/b7XD38vtrF7tW4p7UDPzqeL3c8BPgd2BBRExFaD4/VzzIpQkSVKjND1BjYiOiNigdxk4AHgYuAE4uqh2NPDz5kQoSZKkRirDEP/mwE8rt5kyDvhhZv7fiPgt8OOI+CQwDzisiTFKkiSpQZqeoGbmXOBt/ZQvBGY0PiJJkiQ1U9OH+CVJkqRqJqiSJEkqFRNUSZIklYoJqiRJkkrFBFWSJEmlYoIqSZKkUjFBlSRJUqmYoEqSJKlUTFAlSZJUKiaokiRJKhUTVEmSJJXKuGYHIElSu5t4/inNDmFQS088u9khqM3YgypJkqRSMUGVJElSqTjEr5Z1+C0Lmx2CJElaC/agSpIkqVRMUCVJklQqJqiSJEkqFRNUSZIklYoJqiRJkkrFBFWSJEmlYoIqSZKkUjFBlSRJUqmYoEqSJKlUmvomqYjYGrgK2BxI4JLMvCAiTgc+BTxfVD01M29sTpSSpL7ue375iB5v5coxjH1lZI+5NnbbdHyzQ5BE8191ugI4KTN/HxEbAPdFxM3FtvMz85tNjE2SJElN0NQENTOfAZ4pll+OiEeBLZsZkyRJkpqr2T2oq0TENODtwD3AnsDnI+Io4HdUellfGmjfrq6ums+3NvtodOht257FE5ociUZaz+KeET/mypUrR/yYWjtlaIuenpH/M9YK5q7jv5n+m9va1qZ9Ozs7B91eigQ1IiYB1wMnZOaiiLgQ+DqV+1K/DnwL+MRA+w91kX11dXXVvI9Gh+q27Zi3sMnRaCT1LO6hY1LHiB937NixI35M1W7lypWlaIuOjvWbHUIprcu/mf6b29rq1b5Nf4o/ItajkpzOzMyfAGTmgsxcmZmvAZcCuzczRkmSJDVOUxPUiAjgMuDRzDyvqnxqVbUPAQ83OjZJkiQ1R7OH+PcEjgT+EBEPFGWnAh+NiF2oDPE/AXymOeFJksgGRGsAAAuWSURBVCSp0Zr9FP9dQPSzyTlPJUmS2lTT70GVJEmSqpmgSpIkqVRMUCVJklQqJqiSJEkqFRNUSZIklUqzp5mSJKk07nt+ebNDqIvdNh3f7BCkmtiDKkmSpFIxQZUkSVKpmKBKkiSpVExQJUmSVComqJIkSSoVE1RJkiSVigmqJEmSSsV5UCVJ0qAmnn/KWu+7fU8PEzs6RjCa/i098ey6n0ONYw+qJEmSSsUeVEl1c9ptZ43YsVauXMnYsWNH7HiSWsu69PI2gj28tbEHVZIkSaVigipJkqRSMUGVJElSqXgPqjj8loXNDmHE9CyeQMe81rkeSRoJ9z2/vGnnXrlyDGNfqc/5d9t0fF2Oq+azB1WSJEmlYoIqSZKkUjFBlSRJUql4D6o0io3kPKOSJJVFqXtQI+KgiPhjRMyJiJObHY8kSZLqr7Q9qBExFvgX4H3AfOC3EXFDZj7S3MgkSZJqU/Y3XUG53nYVmdnsGPoVEXsAp2fmgcX6KQCZeTZAd3d3OQOXJEnSsE2ePDn6lpV5iH9L4Mmq9flFmSRJklpYmRNUSZIktaHS3oMKPAVsXbW+VVEG9N8dLEmSpNGvzD2ovwU6I2K7iBgPHAHc0OSYJEmSVGel7UHNzBUR8Xng34CxwOWZObvJYUmSJKnOytyDSmbemJlvycw3Z+aZw9knIi6PiOci4uGqsn+KiMci4qGI+GlETKnadkoxz+ofI+LAelyHRk5/7Vu17aSIyIh4Y7EeEfHton0fiohdGx+xhmugto2ILxTf39kRcW5Vud/dUWSAv5t3iYi7I+KBiPhdROxelPvdHUUiYuuIuD0iHim+p18syjeOiJsjoqv4vVFRbvuOEoO0bd3zqlInqGvpCuCgPmU3Aztn5luB/wROAYiIHancOrBTsc/3ivlXVV5XsGb7EhFbAwcAf64qPhjoLH4+DVzYgPi09q6gT9tGxH7AocDbMnMn4JtFud/d0ecK1vzungv8Q2buApxWrIPf3dFmBXBSZu4ITAeOL76jJwO3ZmYncGuxDrbvaDJQ29Y9r2q5BDUz7wRe7FP268xcUazeTeWBK6j8w/ejzFyWmY8Dc4DdGxasatZf+xbOB74MVM+PeyhwVVbcDUyJiKkNCFNrYYC2/SxwTmYuK+o8V5T73R1lBmjfBDYslicDTxfLfndHkcx8JjN/Xyy/DDxKZVrIQ4Eri2pXAn9TLNu+o8RAbduIvKrlEtRh+ARwU7HsXKstICIOBZ7KzAf7bLJ9R7+3AHtHxD0R8e8R8c6i3LZtDScA/xQRT1LpHe991Y7tO0pFxDTg7cA9wOaZ+Uyx6Vlg82LZ9h2F+rRttbrkVW2VoEbE31Hprp7Z7Fg0MiLiDcCpVIYH1XrGARtTGVr6EvDjiHCKudbxWeDEzNwaOBG4rMnxaB1ExCTgeuCEzFxUvS0rr630DZCj1EBtW8+8qm0S1Ig4Bng/8LF8/f2ug861qlHhzcB2wIMR8QSVNvx9RLwJ27cVzAd+UgwF3gu8BrwR27ZVHA38pFj+P7w+FGj7jjIRsR6VBGZmZva26YLeofvid+8tOrbvKDJA29Y9r2qLBDUiDqJyf+IHM/OVqk03AEdExISI2I7KDdv3NiNGrZ3M/ENmbpaZ0zJzGpWEZtfMfJZK+x5VPDE6HeiuGm7S6PAzYD+AiHgLMB54Ab+7reJpYJ9ieX+gq1j2uzuKFKMalwGPZuZ5VZtuoPKfEIrfP68qt31HgYHathF5VWnnQV1bEXENsC/wxoiYD/w9lfuaJgA3F6ODd2fmcZk5OyJ+DDxCpYv6+Mxc2ZzINRz9tW9mDjQseCNwCJWbtF8Bjm1IkForA3x3LwcuL6YmWg4cXfxP3e/uKDNA+34KuCAixgFLqTzRDX53R5s9gSOBP0TEA0XZqcA5VG7L+SQwDzis2Gb7jh4Dte23qXNeFa/3ykqSJEnN1xZD/JIkSRo9TFAlSZJUKiaokiRJKhUTVEmSJJWKCaqklhARp0bE94dZ9/SI+Nd1PN+EiHikap7HiyLia+tyzHqr5TMqk4j4QkR8o9lxSGocn+KXVArFixb+R2beUlV2TFG21wif63Tgv2Tmx4cbSz91vgDslJnHjWRsIyUi9gX+NTO3GqruCJ0vgc7MnFOHY0+kMiXRrpn53FD1JY1+9qBK0to5Dri62UG0gmIe1AFl5lIq7/o+qjERSWo2E1RJo0ZEbBER10fE8xHxeET8z6ptqw3bR8RRETEvIhZGxNci4omIeG/V4cZHxFUR8XJEzI6IdxT7XQ1sA/wiIhZHxJf7iWMbYHvgnqqyKyLiH4vlfSNifkScFBHPRcQzETHgZOQRsXFE/CAino6IlyLiZ1Xb3h8RD0TEXyLiPyLirVXbnoiI/x0RD0VEd0RcGxETI6KDSkK3RXENi4vPbtVnFBHTIiIj4tiIeLI473ER8c7ieH+JiO/2ifMTEfFoUfffImLbovzOosqDxbkOH2bsX4mIh4CeiBhXrD9VtMkfI2JG1envAP56oM9QUmsxQZU0KkTEGOAXwIPAlsAM4ISIOLCfujsC3wM+BkwFJhf7VPsg8CNgCpXX830XIDOPBP4MfCAzJ2Xmuf2E89+AuZm5YpCQ31R13k8C/xIRGw1Q92rgDcBOwGbA+cV1vJ3K27Q+A2wCXAzcEBETqvY9DDgI2A54K3BMZvYABwNPF9cwKTOfHuDc76LyOsLDgX8G/g54bxHLYRGxTxHLoVTeIPNhYFNgFnANQGa+pzjW24pzXTvM2D9KJemcArwZ+DzwzszcADgQeKKq7qPA2wa4BkktxgRVUpn8rOht+0tE/IVKktnrncCmmXlGZi7PzLnApcAR/RznI8AvMvOuzFwOnAb0veH+rsy8sXgN39XUlvxMAV4eos6rwBmZ+Wpm3ggsBnboW6l4yOpg4LjMfKmo/+/F5k8DF2fmPZm5MjOvBJYB06sO8e3MfDozX6SSwO9Sw3UAfD0zl2bmr4Ee4JrMfC4zn6KShL69qHcccHZmPlok5mcBu/T2ovZjuLE/mZlLgJVUXp24Y0Ssl5lPZOafquq+TCXhl9QGTFAllcnfZOaU3h/gc1XbtqUyZF2dwJ4KbN7PcbYAnuxdycxXgIV96jxbtfwKMHGoeyGrvARsMESdhX16WF8BJvVTb2vgxcx8qZ9t2wIn9bnmralcX6++19HfOQazoGp5ST/rvcfbFrigKo4XgWDNnulaYq9uoznACcDpwHMR8aOIqK67AdBd47VJGqVMUCWNFk8Cj1cnsJm5QWYe0k/dZ4BVT69HxPpUhpmHa6jpTR4CtqshoR3Mk8DGETFlgG1n9rnmN2TmNcM47khP0fIk8Jk+sayfmf8xSP2hYl8txsz8YTFjw7bFtuqppf4rlds7JLUBE1RJo8W9wMvFgzTrR8TYiNg5It7ZT93rgA9ExLsjYjyVXrmo4VwLqDwE1a/MnE9l2qPdazjmQMd6hsoDTd+LiI0iYr2I6L2n81LguIh4V1R0RMRfR8RQvbe917BJRIzUsPhFwCkRsRNAREyOiP/e53zVn1lNsUfEDhGxf3GP6lIqvbevVVXZh8rnJKkNmKBKGhWKe0XfT+Uey8eBF4Dv0899iZk5G/gClYegnqFy/+dzVO6BHI6zga8WQ9P/e4A6FwNH1nINgziSyj2rj1GJ8wSAzPwd8CkqD3C9RCUpPmY4B8zMx6g8xDS3uI4thtpniOP9lEqP5o8iYhHwMJV7Z3udDlxZnOuwtYh9AnAOlXZ9lsrDYqfAqnlQDwGuXJdrkDR6OFG/pJYXEZOAv1CZSP7xETrmBOB+YEbRC6o6icpLEbbOzDWm/JLUmkxQJbWkiPgAcCuVof1vUZlOadf0Lz1JKj2H+CW1qkOBp4ufTuAIk1NJGh3sQZUkSVKp2IMqSZKkUjFBlSRJUqmYoEqSJKlUTFAlSZJUKiaokiRJKhUTVEmSJJXK/wcQjcgXyNrI0AAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 720x360 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "fig, ax = plt.subplots(1, 1, figsize=(10, 5))\n",
    "\n",
    "df_height[df_height['Gender'] == 1]['Height'].plot(\n",
    "    label='Female', kind='hist', \n",
    "    bins=10, alpha=0.7, ax=ax\n",
    ")\n",
    "df_height[df_height['Gender'] == 2]['Height'].plot(\n",
    "    label='Male', kind='hist', \n",
    "    bins=10, alpha=0.7, ax=ax\n",
    ")\n",
    "\n",
    "# ax.set_xlim(140, 200)\n",
    "ax.legend()\n",
    "ax.set_title('Height Distribution')\n",
    "ax.set_xlabel('Height (in centimeters)')\n",
    "\n",
    "fig.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th colspan=\"2\" halign=\"left\">Height</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>mean</th>\n",
       "      <th>median</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Gender</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>160.9</td>\n",
       "      <td>157.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>179.4</td>\n",
       "      <td>177.3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       Height       \n",
       "         mean median\n",
       "Gender              \n",
       "1       160.9  157.0\n",
       "2       179.4  177.3"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# df_height['Gender (text)'] = df_height['Gender'].apply(lambda g: {1: 'F', 2: 'M'}.get(g, 'N/A'))\n",
    "\n",
    "df_height.groupby('Gender')[['Height']].agg([np.mean, np.median]).round(1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##  Splitting Data then applying the Regressor"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "DecisionTreeRegressor(ccp_alpha=0.0, criterion='mse', max_depth=None,\n",
       "                      max_features=None, max_leaf_nodes=None,\n",
       "                      min_impurity_decrease=0.0, min_impurity_split=None,\n",
       "                      min_samples_leaf=1, min_samples_split=2,\n",
       "                      min_weight_fraction_leaf=0.0, presort='deprecated',\n",
       "                      random_state=None, splitter='best')"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "df_train, df_test = train_test_split(df_height, test_size=0.3, random_state=22)\n",
    "\n",
    "x_train = df_train[['Gender']]\n",
    "x_test  = df_test[['Gender']]\n",
    "\n",
    "y_train = df_train['Height']\n",
    "y_test  = df_test['Height']\n",
    "\n",
    "from sklearn.tree import DecisionTreeRegressor\n",
    "\n",
    "clf = DecisionTreeRegressor()\n",
    "\n",
    "clf.fit(x_train, y_train)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "criterion=mse:\n",
      "\n",
      "|--- Gender <= 1.5\n",
      "|   |--- value: [160.8]\n",
      "|--- Gender >  1.5\n",
      "|   |--- value: [179.7]\n",
      "\n",
      "MSE: 131.8\n",
      "MAE: 8.8\n",
      "\n",
      "criterion=mae:\n",
      "\n",
      "|--- Gender <= 1.5\n",
      "|   |--- value: [157.2]\n",
      "|--- Gender >  1.5\n",
      "|   |--- value: [177.7]\n",
      "\n",
      "MSE: 140.3\n",
      "MAE: 8.1\n"
     ]
    }
   ],
   "source": [
    "from sklearn.tree import export_text\n",
    "from sklearn.metrics import mean_squared_error, mean_absolute_error\n",
    "\n",
    "\n",
    "for criterion in ['mse', 'mae']:\n",
    "    rgrsr = DecisionTreeRegressor(criterion=criterion)\n",
    "    rgrsr.fit(x_train, y_train)\n",
    "\n",
    "    print(f'\\ncriterion={criterion}:\\n')\n",
    "    print(export_text(rgrsr, feature_names=['Gender'], spacing=3, decimals=1))\n",
    "    \n",
    "    y_test_pred = rgrsr.predict(x_test)\n",
    "    \n",
    "    print('MSE:', round(mean_squared_error(y_test, y_test_pred), 1))\n",
    "    print('MAE:', round(mean_absolute_error(y_test, y_test_pred), 1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "230    189.715860\n",
       "140    149.980960\n",
       "172    150.570729\n",
       "782    193.298542\n",
       "406    172.337001\n",
       "          ...    \n",
       "491    172.981764\n",
       "502    180.858260\n",
       "358    167.625938\n",
       "356    157.478166\n",
       "132    154.342254\n",
       "Name: Height, Length: 560, dtype: float64"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_train"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Emphasis on below 150:\n",
      "\n",
      "|--- Gender <= 1.5\n",
      "|   |--- value: [152.7]\n",
      "|--- Gender >  1.5\n",
      "|   |--- value: [179.7]\n",
      "\n",
      "MSE: 170.5\n",
      "MAE: 9.0\n",
      "\n",
      "Emphasis on above 150:\n",
      "\n",
      "|--- Gender <= 1.5\n",
      "|   |--- value: [162.6]\n",
      "|--- Gender >  1.5\n",
      "|   |--- value: [179.7]\n",
      "\n",
      "MSE: 132.7\n",
      "MAE: 9.1\n"
     ]
    }
   ],
   "source": [
    "from sklearn.tree import export_text\n",
    "from sklearn.metrics import mean_squared_error, mean_absolute_error\n",
    "\n",
    "\n",
    "for who_gets_more_weight in ['below 150', 'above 150']:\n",
    "    rgrsr = DecisionTreeRegressor(criterion='mse')\n",
    "    if who_gets_more_weight == 'below 150':\n",
    "        sample_weight = y_train.apply(lambda h: 10 if h <= 150 else 1)\n",
    "    else:\n",
    "        sample_weight = y_train.apply(lambda h: 10 if h > 150 else 1)\n",
    "    rgrsr.fit(x_train, y_train, sample_weight=sample_weight)\n",
    "\n",
    "    print(f'\\nEmphasis on {who_gets_more_weight}:\\n')\n",
    "    print(export_text(rgrsr, feature_names=['Gender'], spacing=3, decimals=1))\n",
    "    \n",
    "    y_test_pred = rgrsr.predict(x_test)\n",
    "    \n",
    "    print('MSE:', round(mean_squared_error(y_test, y_test_pred), 1))\n",
    "    print('MAE:', round(mean_absolute_error(y_test, y_test_pred), 1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
